#filtering for genes that have p<0.05 and fc>2 for at least one of the six different pairwise comparisons. These genes are referred to as "DEGs", differentially expressed genes
b <- filter(a, queenfertile_p<0.05 & abs(queenfertile_fold)>2 | queeninfertile_p<0.05 & abs(queeninfertile_fold)>2 | queenforager_p<0.05 & abs(queenforager_fold)>2 | fertileinfertile_p<0.05 & abs(fertileinfertile_fold)>2 | fertileforager_p<0.05 & abs(fertileforager_fold)>2 | infertileforager_p<0.05 & abs(infertileforager_fold)>2)
#selecting just the expression values
s <- mutate(b) %>% select(queen, fertile, infertile, forager)
#performing nmds
s.mds <- metaMDS(s)
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.05361148
## Run 1 stress 0.4211336
## Run 2 stress 0.09211227
## Run 3 stress 0.08446665
## Run 4 stress 0.08107626
## Run 5 stress 0.08590129
## Run 6 stress 0.09901697
## Run 7 stress 0.09068718
## Run 8 stress 0.09663558
## Run 9 stress 0.4211325
## Run 10 stress 0.08958067
## Run 11 stress 0.08854201
## Run 12 stress 0.09118476
## Run 13 stress 0.08936956
## Run 14 stress 0.4211304
## Run 15 stress 0.08725357
## Run 16 stress 0.08669621
## Run 17 stress 0.09250578
## Run 18 stress 0.08994482
## Run 19 stress 0.08985221
## Run 20 stress 0.08501653
## *** No convergence -- monoMDS stopping criteria:
## 20: scale factor of the gradient < sfgrmin
s.mds
##
## Call:
## metaMDS(comm = s)
##
## global Multidimensional Scaling using monoMDS
##
## Data: wisconsin(sqrt(s))
## Distance: bray
##
## Dimensions: 2
## Stress: 0.05361148
## Stress type 1, weak ties
## No convergent solutions - best solution after 20 tries
## Scaling: centring, PC rotation, halfchange scaling
## Species: expanded scores based on 'wisconsin(sqrt(s))'
#plotting using points
plot(s.mds, type = "p")
#My NMDS plot is almost exactly the same as the published one, but mine is rotated in the opposite orientation
#plotting using text to show that the red labels match up to their positions in the published figure.
plot(s.mds, type = "t")
# filtering for DEGs based on having just one pairwise comparison qualify as significant
b <- filter(a, queenfertile_p<0.05 & abs(queenfertile_fold)>2 | queeninfertile_p<0.05 & abs(queeninfertile_fold)>2 | queenforager_p<0.05 & abs(queenforager_fold)>2 | fertileinfertile_p<0.05 & abs(fertileinfertile_fold)>2 | fertileforager_p<0.05 & abs(fertileforager_fold)>2 | infertileforager_p<0.05 & abs(infertileforager_fold)>2)
#filtering for DEGs expressed above 0 in each caste
q_nonzero <- filter(b, queen != 0)
q_nonzero <- mutate(q_nonzero) %>% select(ID)
fert_nonzero <- filter(b, fertile != 0)
fert_nonzero <- mutate(fert_nonzero) %>% select(ID)
infert_nonzero <- filter(b, infertile != 0)
infert_nonzero <- mutate(infert_nonzero) %>% select(ID)
for_nonzero <- filter(b, forager != 0)
for_nonzero <- mutate(for_nonzero) %>% select(ID)
#creating the diagram
venn.diagram(list(Queen=q_nonzero$ID, Forager=for_nonzero$ID, Fertile=fert_nonzero$ID, Infertile=infert_nonzero$ID), "replication_venn0.tiff", fill = c("blue", "red", "yellow", "green"), cex=2, cat.cex=1.8)
## [1] 1
# filtering for at least one significant pairwise comparison for the set of comparisons involving the caste in question
q_nonzero <- filter(a, queen != 0 & ( queenfertile_p<0.05 & abs(queenfertile_fold)>2 | queeninfertile_p<0.05 & abs(queeninfertile_fold)>2 | queenforager_p<0.05 & abs(queenforager_fold)>2))
q_nonzero <- mutate(q_nonzero) %>% select(ID)
fert_nonzero <- filter(a, fertile != 0 & (queenfertile_p<0.05 & abs(queenfertile_fold)>2 | fertileinfertile_p<0.05 & abs(fertileinfertile_fold)>2 | fertileforager_p<0.05 & abs(fertileforager_fold)>2))
fert_nonzero <- mutate(fert_nonzero) %>% select(ID)
infert_nonzero <- filter(a, infertile != 0 & (fertileinfertile_p<0.05 & abs(fertileinfertile_fold)>2 | queeninfertile_p<0.05 & abs(queeninfertile_fold)>2 | infertileforager_p<0.05 & abs(infertileforager_fold)>2))
infert_nonzero <- mutate(infert_nonzero) %>% select(ID)
for_nonzero <- filter(a, forager != 0 & (infertileforager_p<0.05 & abs(infertileforager_fold)>2 | fertileforager_p<0.05 & abs(fertileforager_fold)>2 | queenforager_p<0.05 & abs(queenforager_fold)>2))
for_nonzero <- mutate(for_nonzero) %>% select(ID)
venn.diagram(list(Queen=q_nonzero$ID, Forager=for_nonzero$ID, Fertile=fert_nonzero$ID, Infertile=infert_nonzero$ID), "replication_venn1.tiff", fill = c("blue", "red", "yellow", "green"), cex=2,cat.cex=1.8)
## [1] 1
# filtering for significance in all pairwise comparisons for the set of comparisons involving the caste in question
q_nonzero <- filter(a, queen != 0 & queenfertile_p<0.05 & abs(queenfertile_fold)>2 & queeninfertile_p<0.05 & abs(queeninfertile_fold)>2 & queenforager_p<0.05 & abs(queenforager_fold)>2)
q_nonzero <- mutate(q_nonzero) %>% select(ID)
fert_nonzero <- filter(a, fertile != 0 & queenfertile_p<0.05 & abs(queenfertile_fold)>2 & fertileinfertile_p<0.05 & abs(fertileinfertile_fold)>2 & fertileforager_p<0.05 & abs(fertileforager_fold)>2)
fert_nonzero <- mutate(fert_nonzero) %>% select(ID)
infert_nonzero <- filter(a, infertile != 0 & fertileinfertile_p<0.05 & abs(fertileinfertile_fold)>2 & queeninfertile_p<0.05 & abs(queeninfertile_fold)>2 & infertileforager_p<0.05 & abs(infertileforager_fold)>2)
infert_nonzero <- mutate(infert_nonzero) %>% select(ID)
for_nonzero <- filter(a, forager != 0 & infertileforager_p<0.05 & abs(infertileforager_fold)>2 & fertileforager_p<0.05 & abs(fertileforager_fold)>2 & queenforager_p<0.05 & abs(queenforager_fold)>2)
for_nonzero <- mutate(for_nonzero) %>% select(ID)
venn.diagram(list(Queen=q_nonzero$ID, Forager=for_nonzero$ID, Fertile=fert_nonzero$ID, Infertile=infert_nonzero$ID), "replication_venn2.tiff", fill = c("blue", "red", "yellow", "green"), cex=3,cat.cex=1.8)
## [1] 1
# same filtering as above but only on the basis of p value
q_nonzero <- filter(a, queen != 0 & queenfertile_p<0.05 & queeninfertile_p<0.05 & queenforager_p<0.05)
q_nonzero <- mutate(q_nonzero) %>% select(ID)
fert_nonzero <- filter(a, fertile != 0 & queenfertile_p<0.05 & fertileinfertile_p<0.05 & fertileforager_p<0.05)
fert_nonzero <- mutate(fert_nonzero) %>% select(ID)
infert_nonzero <- filter(a, infertile != 0 & fertileinfertile_p<0.05 & queeninfertile_p<0.05 & infertileforager_p<0.05)
infert_nonzero <- mutate(infert_nonzero) %>% select(ID)
for_nonzero <- filter(a, forager != 0 & infertileforager_p<0.05 & fertileforager_p<0.05 & queenforager_p<0.05)
for_nonzero <- mutate(for_nonzero) %>% select(ID)
venn.diagram(list(Queen=q_nonzero$ID, Forager=for_nonzero$ID, Fertile=fert_nonzero$ID, Infertile=infert_nonzero$ID), "replication_venn3.tiff", fill = c("blue", "red", "yellow", "green"), cex=3,cat.cex=1.8)
## [1] 1
#now significance is only determined on the basis of p value
qfert <- filter(a, queenfertile_p<0.05)
qfert <- mutate(qfert) %>% select(ID)
qinfert <- filter(a, queeninfertile_p<0.05)
qinfert <- mutate(qinfert) %>% select(ID)
qfor <- filter(a, queenforager_p<0.05)
qfor <- mutate(qfor) %>% select(ID)
fertinfert <- filter(a, fertileinfertile_p<0.05)
fertinfert <- mutate(fertinfert) %>% select(ID)
fertfor <- filter(a, fertileforager_p<0.05)
fertfor <- mutate(fertfor) %>% select(ID)
infertfor <- filter(a, infertileforager_p<0.05)
infertfor <- mutate(infertfor) %>% select(ID)
#defining two vectors, one containing the number of genes filtered into each category, and one containing labels
slices <- c(nrow(qfert), nrow(qinfert), nrow(qfor), nrow(fertinfert), nrow(fertfor), nrow(infertfor))
lbls <- c("q vs. fer", "q vs. inf", "q vs. for", "fer vs. inf", "fer vs. for", "inf vs. for")
lbls <- paste(lbls, slices) # add percents to labels
#creating a small data frame from the vectors above to hand to ggplot
df <- data.frame(group = lbls, value = slices)
#plotting by converting a stacked bar graph into a polar coordinate circle to act as a pie chart
bp<- ggplot(df, aes(x="", y=value, fill=group))+ geom_bar(width = 1, stat = "identity")+ scale_fill_manual(values = c("q vs. fer 3102" = "orange", "q vs. inf 3224" = "navy", "q vs. for 2987" = "yellow", "fer vs. inf 838" = "red", "fer vs. for 718" = "green", "inf vs. for 553" = "blue"))
pie <- bp + coord_polar("y", start=45) + blank_theme + theme(axis.text.x=element_blank()) + geom_text(aes(x = 1.8, label = c("q vs. fer 3104", "q vs. inf 3224", "q vs. for 2987", "fer vs. inf 838", "fer vs. for 718", "inf vs. for 553")), size=3, position = position_stack(vjust = 0.5))
pie
#initially filtering the same way as above
qfert <- filter(a, queenfertile_p<0.05 & abs(queenfertile_fold)>2)
qfert <- mutate(qfert) %>% select(ID)
qinfert <- filter(a, queeninfertile_p<0.05 & abs(queeninfertile_fold)>2)
qinfert <- mutate(qinfert) %>% select(ID)
qfor <- filter(a, queenforager_p<0.05 & abs(queenforager_fold)>2)
qfor <- mutate(qfor) %>% select(ID)
fertinfert <- filter(a, fertileinfertile_p<0.05 & abs(fertileinfertile_fold)>2)
fertinfert <- mutate(fertinfert) %>% select(ID)
fertfor <- filter(a, fertileforager_p<0.05 & abs(fertileforager_fold)>2)
fertfor <- mutate(fertfor) %>% select(ID)
infertfor <- filter(a, infertileforager_p<0.05 & abs(infertileforager_fold)>2)
infertfor <- mutate(infertfor) %>% select(ID)
# created sets for each pairwise comparison excluding any genes found in one of the other sets
q_fert <- dplyr::setdiff(qfert, union(qinfert, qfor, fertinfert, fertfor, infertfor))
q_infert <- dplyr::setdiff(qinfert, union(qfert, qfor, fertinfert, fertfor, infertfor))
q_for <- dplyr::setdiff(qfor, union(qfert, qinfert, fertinfert, fertfor, infertfor))
fert_infert <- dplyr::setdiff(fertinfert, union(qfert, qinfert, qfor, fertfor, infertfor))
fert_for <- dplyr::setdiff(fertfor, union(qfert, qinfert, qfor, fertinfert, infertfor))
infert_for <- dplyr::setdiff(infertfor, union(qfert, qinfert, qfor, fertinfert, fertfor))
#defining two vectors, one containing the number of genes filtered into each category, and one containing labels
slices <- c(nrow(q_fert), nrow(q_infert), nrow(q_for), nrow(fert_infert), nrow(fert_for), nrow(infert_for))
lbls <- c("q vs. fer", "q vs. inf", "q vs. for", "fer vs. inf", "fer vs. for", "inf vs. for")
lbls <- paste(lbls, slices) # add percents to labels
#creating a small data frame from the vectors above to hand to ggplot
df <- data.frame(group = lbls, value = slices)
#plotting by converting a stacked bar graph into a polar coordinate circle to act as a pie chart
bp<- ggplot(df, aes(x="", y=value, fill=group))+ geom_bar(width = 1, stat = "identity")+ scale_fill_manual(values = c("q vs. fer 651" = "orange", "q vs. inf 624" = "navy", "q vs. for 459" = "yellow", "fer vs. inf 138" = "red", "fer vs. for 152" = "green", "inf vs. for 141" = "blue"))
pie <- bp + coord_polar("y", start=45) + blank_theme + theme(axis.text.x=element_blank()) + geom_text(aes(x = 1.8, label = c("q vs. fer 651", "q vs. inf 625", "q vs. for 459", "fer vs. inf 138", "fer vs. for 152", "inf vs. for 141")), size=3, position = position_stack(vjust = 0.5))
pie
# filtering for significant upregulation in all pairwise comparisons for the set of comparisons involving the caste in question based on both p value and fc
q_up <- filter(a, queenfertile_p<0.05 & queenfertile_fold>2 & queeninfertile_p<0.05 & queeninfertile_fold>2 & queenforager_p<0.05 & queenforager_fold>2)
fert_up <- filter(a, queenfertile_p<0.05 & queenfertile_fold < -2 & fertileinfertile_p<0.05 & fertileinfertile_fold>2 & fertileforager_p<0.05 & fertileforager_fold>2)
infert_up <- filter(a, fertileinfertile_p<0.05 & fertileinfertile_fold < -2 & queeninfertile_p<0.05 & queeninfertile_fold < -2 & infertileforager_p<0.05 & infertileforager_fold>2)
for_up <- filter(a, infertileforager_p<0.05 & infertileforager_fold < -2 & fertileforager_p<0.05 & fertileforager_fold < -2 & queenforager_p<0.05 & queenforager_fold < -2)
###
slices <- c(nrow(q_up), nrow(fert_up), nrow(infert_up), nrow(for_up))
lbls <- c("q", "fer", "inf", "for")
lbls <- paste(lbls, slices) # add percents to labels
df <- data.frame(group = lbls, value = slices)
bp<- ggplot(df, aes(x="", y=value, fill=group))+ geom_bar(width = 1, stat = "identity")+ scale_fill_manual(values = c("q 380" = "blue", "fer 31" = "red", "inf 23" = "green", "for 16" = "purple"))
pie <- bp + coord_polar("y", start=45) + blank_theme + theme(axis.text.x=element_blank()) + geom_text(aes(x = 1.6, label = c("q 380", "fer 31", "inf 23", "for 16")), size=3, position = position_stack(vjust = 0.5))
pie
# I am filtering for p<0.05 and fc>2 for each pairwise comparison in both "directions", then editing the set down to only contain the column list of gene IDs.
qfert <- filter(a, queenfertile_p<0.05 & queenfertile_fold>2)
qfert <- mutate(qfert) %>% select(ID)
fertq <- filter(a, queenfertile_p<0.05 & queenfertile_fold < -2)
fertq<- mutate(fertq) %>% select(ID)
qinfert <- filter(a, queeninfertile_p<0.05 & queeninfertile_fold>2)
qinfert <- mutate(qinfert) %>% select(ID)
infertq <- filter(a, queeninfertile_p<0.05 & queeninfertile_fold < -2)
infertq <- mutate(infertq) %>% select(ID)
qfor <- filter(a, queenforager_p<0.05 & queenforager_fold>2)
qfor <- mutate(qfor) %>% select(ID)
forq <- filter(a, queenforager_p<0.05 & queenforager_fold < -2)
forq <- mutate(forq) %>% select(ID)
fertinfert <- filter(a, fertileinfertile_p<0.05 & fertileinfertile_fold>2)
fertinfert <- mutate(fertinfert) %>% select(ID)
infertfert <- filter(a, fertileinfertile_p<0.05 & fertileinfertile_fold < -2)
infertfert <- mutate(infertfert) %>% select(ID)
fertfor <- filter(a, fertileforager_p<0.05 & fertileforager_fold>2)
fertfor <- mutate(fertfor) %>% select(ID)
forfert <- filter(a, fertileforager_p<0.05 & fertileforager_fold < -2)
forfert <- mutate(forfert) %>% select(ID)
infertfor <- filter(a, infertileforager_p<0.05 & infertileforager_fold>2)
infertfor <- mutate(infertfor) %>% select(ID)
forinfert <- filter(a, infertileforager_p<0.05 & infertileforager_fold < -2)
forinfert <- mutate(forinfert) %>% select(ID)
#Now I am filtering for the subsets of these gene lists that only appear within that selected comparison and not in any of the others, using dplyr's setdiff function.
q_fert <- dplyr::setdiff(qfert, union(fertq, qinfert, infertq, qfor, forq, fertinfert, infertfert, fertfor, forfert, infertfor, forinfert))
fert_q <- dplyr::setdiff(fertq, union(qfert, qinfert, infertq, qfor, forq, fertinfert, infertfert, fertfor, forfert, infertfor, forinfert))
q_infert <- dplyr::setdiff(qinfert, union(qfert, fertq, infertq, qfor, forq, fertinfert, infertfert, fertfor, forfert, infertfor, forinfert))
infert_q <- dplyr::setdiff(infertq, union(qfert, fertq, qinfert, qfor, forq, fertinfert, infertfert, fertfor, forfert, infertfor, forinfert))
q_for <- dplyr::setdiff(qfor, union(qfert, fertq, qinfert, infertq, forq, fertinfert, infertfert, fertfor, forfert, infertfor, forinfert))
for_q <- dplyr::setdiff(forq, union(qfert, fertq, qinfert, infertq, qfor, fertinfert, infertfert, fertfor, forfert, infertfor, forinfert))
fert_infert <- dplyr::setdiff(fertinfert, union(qfert, fertq, qinfert, infertq, qfor, forq, infertfert, fertfor, forfert, infertfor, forinfert))
infert_fert <- dplyr::setdiff(infertfert, union(qfert, fertq, qinfert, infertq, qfor, forq, fertinfert, fertfor, forfert, infertfor, forinfert))
fert_for <- dplyr::setdiff(fertfor, union(qfert, fertq, qinfert, infertq, qfor, forq, fertinfert, infertfert, forfert, infertfor, forinfert))
for_fert <- dplyr::setdiff(forfert, union(qfert, fertq, qinfert, infertq, qfor, forq, fertinfert, infertfert, fertfor, infertfor, forinfert))
infert_for <- dplyr::setdiff(infertfor, union(qfert, fertq, qinfert, infertq, qfor, forq, fertinfert, infertfert, fertfor, forfert, forinfert))
for_infert <- dplyr::setdiff(forinfert, union(qfert, fertq, qinfert, infertq, qfor, forq, fertinfert, infertfert, fertfor, forfert, infertfor))
#This will be the row in the table showing the total genes that filtered as signficant for each pairwise comparison
#Simultaneously, I am reording the gene sets to match the order in the published table
Total <- c(nrow(qfert), nrow(qinfert), nrow(qfor), nrow(fertq), nrow(fertinfert), nrow(fertfor), nrow(infertfert), nrow(infertq), nrow(infertfor),nrow(forq), nrow(forfert), nrow(forinfert))
#This will be the row in the table showing just the genes uniquely filtering into each pairwise comparison
Pair_specific <- c(nrow(q_fert), nrow(q_infert), nrow(q_for), nrow(fert_q), nrow(fert_infert), nrow(fert_for), nrow(infert_fert), nrow(infert_q), nrow(infert_for),nrow(for_q), nrow(for_fert), nrow(for_infert))
#This will be the the row showing what percent of each gene set is unique to that comparison by dividing one vector by the other
Pair_specific_percent <- signif(Pair_specific/Total, 2)
Pair_specific_percent <- Pair_specific_percent * 100
Pair_specific_percent <- paste(Pair_specific_percent, " %", sep="")
#These vectors match the labels used in the published table
Focal_caste <- c("Queen", "", "", "Fertile worker", "", "", "Infertile worker", "", "", "Forager", "", "")
Comparison <- c("fer", "inf", "fer", "q", "inf", "for", "q", "fer", "for", "q", "fer", "inf")
table1 <- data.frame(Focal_caste, Comparison, Total, Pair_specific, Pair_specific_percent)
#kable function produces a pretty table
knitr::kable(table1)
| Focal_caste | Comparison | Total | Pair_specific | Pair_specific_percent |
|---|---|---|---|---|
| Queen | fer | 1055 | 566 | 54 % |
| inf | 876 | 376 | 43 % | |
| fer | 968 | 462 | 48 % | |
| Fertile worker | q | 1728 | 1717 | 99 % |
| inf | 273 | 193 | 71 % | |
| for | 236 | 170 | 72 % | |
| Infertile worker | q | 306 | 118 | 39 % |
| fer | 2043 | 667 | 33 % | |
| for | 194 | 127 | 65 % | |
| Forager | q | 1753 | 416 | 24 % |
| fer | 221 | 57 | 26 % | |
| inf | 144 | 77 | 53 % |